1 Data description

Download CDC NHANES 2011-2014 data on demographics (DEMO) and cognitive function (CFQ) from University of Michigan dementia data set: https://www.openicpsr.org/openicpsr/project/151621/version/V1/view. Introduction video and dataset description: https://capra.med.umich.edu/nhanes.html.

2 Descriptive analysis

#load("~/Dropbox/AD/NHANES/analysis/nhanes.RData")
sumtable(data,group="year")
Summary Statistics
year
2011-2012
2013-2014
Variable N Mean SD N Mean SD
SEQN 9756 67038 2816 10175 78644 2937
RIDSTATR 9756 2 0.2 10175 2 0.19
RIAGENDR 9756 1.5 0.5 10175 1.5 0.5
female 9756 0.5 0.5 10175 0.51 0.5
RIDAGEYR 9756 31 25 10175 31 24
age_cat 1791 1.7 0.79 1841 1.7 0.77
RIDRETH1 9756 3.2 1.3 10175 3.1 1.3
race 9756 2.3 1.1 10175 2.2 1.1
RIDRETH3 9756 3.4 1.6 10175 3.3 1.6
DMDEDUC2 5560 3.5 1.3 5769 3.5 1.2
edu_cat 5555 2.6 1.1 5762 2.6 1.1
cfq_present 9756 0.17 0.38 10175 0.18 0.38
CFASTAT 1687 1.6 1.4 1785 1.3 1
CFDCCS 1510 1.1 0.34 1686 1 0.13
CFDCST1 1467 4.3 1.8 1681 4.8 1.7
CFDCST2 1463 6.3 1.9 1678 6.8 2
CFDCST3 1458 7.1 1.9 1674 7.7 1.9
CFDCSR 1454 5.4 2.4 1672 6.1 2.4
CFDAST 1449 16 5.6 1661 16 5.6
CFDDS 1422 45 18 1592 46 17
cerad_sum 1451 23 6.8 1672 25 6.9
z_cerad_re 1451 -0.17 0.98 1672 0.15 0.99
z_animal_re 1449 -0.0024 1 1661 0.0021 1
z_digit_re 1422 -0.014 1 1592 0.013 0.99
z_delayed_re 1454 -0.15 0.99 1672 0.13 0.99
z_global_re 1361 -0.068 2.4 1573 0.27 2.3
low_cerad_re 1451 0.3 0.46 1672 0.2 0.4
low_animal_re 1449 0.24 0.42 1661 0.24 0.43
low_digit_re 1422 0.25 0.44 1592 0.23 0.42
low_delayed_re 1454 0.27 0.45 1672 0.19 0.39
low_global_re 1361 0.28 0.45 1573 0.23 0.42
z_cerad_edu 1449 -0.17 0.97 1670 0.15 1
z_animal_edu 1447 -0.013 1 1659 0.011 1
z_digit_edu 1420 -0.0063 0.99 1591 0.0056 1
z_delayed_edu 1452 -0.14 0.98 1670 0.12 1
z_global_edu 1359 -0.077 2.2 1572 0.26 2.3
low_cerad_edu 1449 0.28 0.45 1670 0.2 0.4
low_animal_edu 1447 0.24 0.42 1659 0.24 0.43
low_digit_edu 1420 0.25 0.43 1591 0.25 0.43
low_delayed_edu 1452 0.28 0.45 1670 0.2 0.4
low_global_edu 1359 0.27 0.44 1572 0.24 0.42
z_cerad_age 1451 -0.19 0.97 1672 0.16 1
z_animal_age 1449 -0.028 1 1661 0.024 1
z_digit_age 1422 -0.039 1 1592 0.035 0.99
z_delayed_age 1454 -0.16 0.98 1672 0.14 1
z_global_age 1361 -0.14 2.3 1573 0.33 2.3
low_cerad_age 1451 0.3 0.46 1672 0.19 0.39
low_animal_age 1449 0.25 0.43 1661 0.25 0.43
low_digit_age 1422 0.26 0.44 1592 0.23 0.42
low_delayed_age 1454 0.27 0.44 1672 0.19 0.39
low_global_age 1361 0.28 0.45 1573 0.23 0.42
WTINT2YR 9756 15713 17031 10175 15293 13474
WTMEC2YR 9756 15713 17600 10175 15293 13971
SDMVPSU 9756 1.6 0.64 10175 1.5 0.5
SDMVSTRA 9756 96 4 10175 111 4.3

2.1 Histograms of year 2011-2012

Age: RIDAGEYR; Cognitive measures: CFDDS - Digital Symbol Score, cerad_sum - Sum of Word Learning and Delayed Recall Score, CFDAST - Animal Fluency Score, race-adjusted z scores of Digital Symbol and global scores

#Digital symbol, Sum of 4 CERAD (3 learning + 1 recall), animal fluency; scores and age-adjusted scores
scores<-data[data$year=="2011-2012",c("RIDAGEYR","CFDDS","cerad_sum","CFDAST","z_digit_re", "z_global_re")]
hist(scores, main="Histogram of year 2011-2012")

2.2 Histograms of year 2013-2014

scores<-data[data$year=="2013-2014",c("RIDAGEYR","CFDDS","cerad_sum","CFDAST","z_digit_re", "z_global_re")]
hist.data.frame(scores, main="Histogram of years 2013-2014")

3 Regression analysis of DS and age

3.1 Regression and scatterplot

temp<-lm(data$CFDDS~data$RIDAGEYR)
summary(temp)
## 
## Call:
## lm(formula = data$CFDDS ~ data$RIDAGEYR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.478 -12.156   0.817  12.058  58.451 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   96.33827    3.13534   30.73   <2e-16 ***
## data$RIDAGEYR -0.73219    0.04491  -16.30   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.72 on 3012 degrees of freedom
##   (16917 observations deleted due to missingness)
## Multiple R-squared:  0.0811, Adjusted R-squared:  0.08079 
## F-statistic: 265.8 on 1 and 3012 DF,  p-value: < 2.2e-16
print(paste("Sample size is: n=", nobs(temp)))
## [1] "Sample size is: n= 3014"
plot(data$RIDAGEYR,data$CFDDS,xlab="Age", ylab="Digital Symbol Scores",main=
"Association Between Age and Digital Symbol Scores",xlim=c(60,80))
abline(temp)

3.2 Interpretation and p-value

As age increases, your score on the Digital Symbol Test decreases. With every one year increase in age, your score decreases by roughly 0.732 points. The p-value of <0.001 indicates a very significant association. However, the r-squared value of roughly 8.1% indicates there are other variables at play in addition to age.

4 Construct biological age

Biological age was compuated for NHANES (references: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10460553/; https://link.springer.com/article/10.1007/s11357-021-00480-5) using R package “BioAge” (https://github.com/dayoonkwon/BioAge).

It measures chronological age where one’s predicted mortality risk (from biomarkers) is the same as a normal person in a reference population. For example, if a person’s biologically predicted mortality is 0.1, and reference population reaches mortality at 0.1 at age 80, then the person’s biological age is 80. If the person’s actual chronogical age is only 70, then the person is at more advanced or older biological state.

#HD using NHANES (separate training for men and women)
library(BioAge)
hd = hd_nhanes(biomarkers=c("albumin","alp","lncrp","totchol","lncreat","hba1c","sbp","bun","uap","lymph","mcv","wbc"))

#KDM bioage using NHANES (separate training for men and women)
kdm = kdm_nhanes(biomarkers=c("albumin","alp","lncrp","totchol","lncreat","hba1c","sbp","bun","uap","lymph","mcv","wbc"))

#phenoage using NHANES
phenoage = phenoage_nhanes(biomarkers=c("albumin_gL","alp","lncrp","totchol","lncreat_umol","hba1c","sbp","bun","uap","lymph","mcv","wbc"))

#assemble NHANES IV dataset with projected biological aging measures for analysis; from Github
data_temp = merge(hd$data, kdm$data) %>% merge(., phenoage$data)

##Discovery cohort: 2011; Validation cohort: 2013
data1<-data_temp
data2<-cbind(substr(data1$sampleID,6,10),data1)
colnames(data2)[1]<-"SEQN"

train = phenoage_calc(NHANES3,
                      biomarkers = c("albumin_gL","lymph","mcv","glucose_mmol",
                      "rdw","creat_umol","lncrp","alp","wbc"))

phenoage = phenoage_calc(data2,
                         biomarkers = c("albumin_gL","lymph","mcv","glucose_mmol",
                         "rdw","creat_umol","alp","wbc"),
                         fit = train$fit)
bioage<-c("phenoage","phenoage_advance","kdm","kdm_advance","hd","hd_log")
chronic<-c("sbp","bmi")
data_use<-data
data_temp1<-merge(data_use,phenoage$data[,c("SEQN",bioage,chronic,"income_recode","grip_scaled","fev_1000")],by="SEQN")
data3<-data_temp1[data_temp1$edu_cat!="NaN",] #remove a few missing edu
data3$edu_cat1<-as.numeric(data3$edu_cat>2) ##combine education categories
data3$race2<-data3$race==2;data3$race3<-data3$race==3;data3$race4<-data3$race==4; #recode race variable
data3$oldage<-as.numeric(data3$RIDAGEYR>=65) #63 is the 3rd quarile for age.
data3$oldbioage<-as.numeric(data3$phenoage>=60) #60 is the 3rd quartile for bioage: Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  #5.625  30.252  45.080  46.115  60.386 109.968    1366 
data3$bioadvance<-as.numeric(data3$phenoage_advance>=0)
data3$income_recode1<-as.numeric(data3$income_recode>6) #35K or higher

4.1 Biological age versus age

corrplot(cor(data3[,c("CFDDS","cerad_sum", "CFDAST","z_digit_re","z_digit_edu", "z_digit_age","RIDAGEYR","phenoage")],use="complete.obs"),method = 'number')

cocor(~CFDDS + RIDAGEYR | CFDDS + phenoage, data = data3,
      test = c("hittner2003", "zou2007"))
## 
##   Results of a comparison of two overlapping correlations based on dependent groups
## 
## Comparison between r.jk (CFDDS, RIDAGEYR) = -0.284 and r.jh (CFDDS, phenoage) = -0.3285
## Difference: r.jk - r.jh = 0.0445
## Related correlation: r.kh = 0.8076
## Data: data3: j = CFDDS, k = RIDAGEYR, h = phenoage
## Group size: n = 2748
## Null hypothesis: r.jk is equal to r.jh
## Alternative hypothesis: r.jk is not equal to r.jh (two-sided)
## Alpha: 0.05
## 
## hittner2003: Hittner, May, and Silver's (2003) modification of Dunn and Clark's z (1969) using a backtransformed average Fisher's (1921) Z procedure
##   z = 3.9694, p-value = 0.0001
##   Null hypothesis rejected
## 
## zou2007: Zou's (2007) confidence interval
##   95% confidence interval for r.jk - r.jh: 0.0225 0.0665
##   Null hypothesis rejected (Interval does not include 0)
##missing data patterns;
#vis_miss(data3)
#gg_miss_var(data3[data3$RIDAGEYR>59,],show_pct = T)
plot(data3$RIDAGEYR,data3$phenoage,cex=0.2,xlab="Age",ylab="Phenoage")
abline(lm(data3$phenoage~data3$RIDAGEYR))

temp<-lm(data3$CFDDS~data3$RIDAGEYR)
temp1<-lm(data3$CFDDS~data3$phenoage)

summary(temp)
## 
## Call:
## lm(formula = data3$CFDDS ~ data3$RIDAGEYR)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.478 -12.129   0.833  12.060  58.447 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    96.27025    3.13681   30.69   <2e-16 ***
## data3$RIDAGEYR -0.73113    0.04493  -16.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.72 on 3009 degrees of freedom
##   (8306 observations deleted due to missingness)
## Multiple R-squared:  0.08088,    Adjusted R-squared:  0.08057 
## F-statistic: 264.8 on 1 and 3009 DF,  p-value: < 2.2e-16
summary(temp1)
## 
## Call:
## lm(formula = data3$CFDDS ~ data3$phenoage)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.084 -11.620   0.911  11.631  57.199 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    87.11014    2.26743   38.42   <2e-16 ***
## data3$phenoage -0.60560    0.03323  -18.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.38 on 2746 degrees of freedom
##   (8569 observations deleted due to missingness)
## Multiple R-squared:  0.1079, Adjusted R-squared:  0.1076 
## F-statistic: 332.2 on 1 and 2746 DF,  p-value: < 2.2e-16

4.2 Merge with medical history

Correlation between Digital Symbol score and bioage is significantly greater than with age.

R-squared for age predicting digital symbol score is 0.084, R-squared for biological age is 0.11.

5 Mediation analysis of effect of sbp on cognitive scores (digital symbol)

5.1 Distribution of outcomes, risk factor, mediators and moderators in the mediation analysis (2011-2014 cohort):

## Warning in st(data4, vars = c("RIDAGEYR", "Female", "race1", "race2", "race3", : Factor variables ignore custom summ options. Cols 1 and 2 are count and percentage.
## Beware combining factors with a custom summ unless factor.numeric = TRUE.
Summary Statistics
year
2011-2012
2013-2014
Variable Mean Sd Mean Sd
RIDAGEYR 69.1 6.8 69.7 6.73
Female 1059 1307
… 0 519 49% 636 48.7%
… 1 540 51% 671 51.3%
race1 1059 1307
… No 565 53.4% 605 46.3%
… Yes 494 46.6% 702 53.7%
race2 1059 1307
… No 787 74.3% 1058 80.9%
… Yes 272 25.7% 249 19.1%
race3 1059 1307
… No 866 81.8% 1066 81.6%
… Yes 193 18.2% 241 18.4%
race4 1059 1307
… No 959 90.6% 1192 91.2%
… Yes 100 9.4% 115 8.8%
edu_cat1 1059 1307
… 0 510 48.2% 606 46.4%
… 1 549 51.8% 701 53.6%
income_recode1 1059 1307
… 0 516 48.7% 609 46.6%
… 1 543 51.3% 698 53.4%
sbp 133 18.9 133 19.1
phenoage 66 9.31 68.9 9.27
bioadvance 1059 1307
… 0 807 76.2% 816 62.4%
… 1 252 23.8% 491 37.6%
z_global_re 0.126 2.33 0.351 2.31
z_digit_re 0.078 0.996 0.0536 0.965
z_animal_re 0.0965 1 0.0646 0.978
z_cerad_re -0.0482 0.918 0.233 0.937
bmi 29.2 6.33 29.1 6.36
## Warning in st(data4, vars = c("RIDAGEYR", "Female", "race1", "race2", "race3", : Factor variables ignore custom summ options. Cols 1 and 2 are count and percentage.
## Beware combining factors with a custom summ unless factor.numeric = TRUE.
Summary Statistics
Variable Mean Sd
RIDAGEYR 69.4 6.76
Female 2366
… 0 1155 48.8%
… 1 1211 51.2%
race1 2366
… No 1170 49.5%
… Yes 1196 50.5%
race2 2366
… No 1845 78%
… Yes 521 22%
race3 2366
… No 1932 81.7%
… Yes 434 18.3%
race4 2366
… No 2151 90.9%
… Yes 215 9.1%
edu_cat1 2366
… 0 1116 47.2%
… 1 1250 52.8%
income_recode1 2366
… 0 1125 47.5%
… 1 1241 52.5%
sbp 133 19
phenoage 67.6 9.39
bioadvance 2366
… 0 1623 68.6%
… 1 743 31.4%
z_global_re 0.251 2.32
z_digit_re 0.0645 0.979
z_animal_re 0.0789 0.988
z_cerad_re 0.107 0.939
bmi 29.2 6.35

5.2 Biological age as the mediator

Hypothesis: Health risk factor (hypertension) has an effect on cognitive score mediated through biological age.

Causal diagram. Mediation consists of three sets of regressions with \(M\): mediator; \(Y\): outcome; \(A\): exposure; \(C\): confounder

The indirect effect (mediation effect) is the product of paths \(b\)*\(c\). Direct effect is the path \(a\). Total effect is direct+indirect.

Mediation analysis package: https://cran.r-project.org/web/packages/mediation/vignettes/mediation.pdf

DiagrammeR::grViz("
digraph {
  splines=line;
  graph [ranksep = 0.2]
  node [shape = plaintext]
    A [label = 'Hypertension (A)', shape=box]
    Y [label = 'Cognitive Function (Y)', shape=box]
    M [label = 'Biological age (M)', shape = box]
  edge [minlen = 4]
    A->Y [xlabel= a]
    A->M [xlabel= b]
    M->Y [xlabel= c]
  { rank = same; A; Y }
}
")

Use weighted least squares regression. Use exam weights to adjust for survey sampling design to reflect the US population.

#outcome: CFDDS
data4<-data3[is.na(data3$sbp)==F&is.na(data3$CFDDS)==F&is.na(data3$WTMEC2YR)==F,]
model.m<-lm(phenoage~sbp+female+bmi+factor(income_recode1)+race2+race3+race4+edu_cat1, data=data4,weights=WTMEC2YR)
model.y<-lm(CFDDS~sbp+phenoage+female+bmi+factor(income_recode1)+race2+race3+race4+edu_cat1, data=data4,weights=WTMEC2YR)

set.seed(1234)
out.1 <- mediate(model.m, model.y, sims = 1000, treat = "sbp",mediator = "phenoage",weights=data$WTMEC2YR)

summary(model.m);summary(model.y);summary(out.1)
## 
## Call:
## lm(formula = phenoage ~ sbp + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4 + edu_cat1, data = data4, weights = WTMEC2YR)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -4760.7  -598.3   -17.5   792.2  5243.3 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             60.609646   1.591868  38.075  < 2e-16 ***
## sbp                      0.080454   0.009574   8.403  < 2e-16 ***
## female                  -2.649095   0.353238  -7.499 8.95e-14 ***
## bmi                      0.047378   0.028182   1.681  0.09287 .  
## factor(income_recode1)1 -3.506617   0.390116  -8.989  < 2e-16 ***
## race2TRUE               -2.707760   0.679185  -3.987 6.90e-05 ***
## race3TRUE               -3.850481   0.719443  -5.352 9.52e-08 ***
## race4TRUE               -2.314311   0.819560  -2.824  0.00478 ** 
## edu_cat1                -2.221327   0.386426  -5.748 1.01e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1182 on 2413 degrees of freedom
##   (496 observations deleted due to missingness)
## Multiple R-squared:  0.1076, Adjusted R-squared:  0.1047 
## F-statistic: 36.38 on 8 and 2413 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = CFDDS ~ sbp + phenoage + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4 + edu_cat1, data = data4, weights = WTMEC2YR)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -8569.5 -1002.6   -58.5   895.9 10490.6 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              92.11863    2.93054  31.434  < 2e-16 ***
## sbp                      -0.04118    0.01413  -2.914 0.003606 ** 
## phenoage                 -0.70685    0.02962 -23.863  < 2e-16 ***
## female                    3.47374    0.51993   6.681 2.93e-11 ***
## bmi                       0.15782    0.04103   3.846 0.000123 ***
## factor(income_recode1)1   5.43112    0.57706   9.412  < 2e-16 ***
## race2TRUE               -11.93700    0.99149 -12.039  < 2e-16 ***
## race3TRUE               -12.79819    1.05301 -12.154  < 2e-16 ***
## race4TRUE                -4.22155    1.19446  -3.534 0.000417 ***
## edu_cat1                  8.37418    0.56610  14.793  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1719 on 2412 degrees of freedom
##   (496 observations deleted due to missingness)
## Multiple R-squared:  0.4307, Adjusted R-squared:  0.4286 
## F-statistic: 202.8 on 9 and 2412 DF,  p-value: < 2.2e-16
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            -0.0571      -0.0718        -0.04  <2e-16 ***
## ADE             -0.0407      -0.0675        -0.01   0.004 ** 
## Total Effect    -0.0977      -0.1265        -0.07  <2e-16 ***
## Prop. Mediated   0.5841       0.4335         0.81  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2422 
## 
## 
## Simulations: 1000
plot(out.1,xlim=c(-0.12,0))

sens0 <- medsens(out.1, rho.by = 0.1)
plot(sens0)

summary(sens0)
## 
## Mediation Sensitivity Analysis for Average Causal Mediation Effect
## 
## Rho at which ACME = 0: -0.4
## R^2_M*R^2_Y* at which ACME = 0: 0.16
## R^2_M~R^2_Y~ at which ACME = 0: 0.0813

5.2.1 Analysis Results

Biological age is a significant mediator because 58% of sbp’s effect is mediated through biological age. The mediation effect is 0.055 (95% CI: -0.0685, -0.04) with a p-value of less than 0.001.

5.3 Moderated mediation analysis by the state of biological aging. Biologically older (bioage>=age) versus biologically younger (bioage<age)

data4<-data3[is.na(data3$sbp)==F&is.na(data3$CFDDS)==F&is.na(data3$WTMEC2YR)==F,]
moderator<-data4$bioadvance
model.m0<-lm(phenoage~sbp*moderator+female+bmi+factor(income_recode1)+race2+race3+race4, data=data4,weights=WTMEC2YR)
model.y0<-lm(CFDDS~phenoage+sbp*moderator+phenoage*moderator+female+bmi+factor(income_recode1)+race2+race3+race4, data=data4,weights=WTMEC2YR)

set.seed(1234)
out.edu0 <- mediate(model.m0, model.y0, sims = 1000, treat = "sbp",mediator = "phenoage",covariates = list(moderator = 0), weights=data4$WTMEC2YR)
set.seed(1234)
out.edu1 <- mediate(model.m0, model.y0, sims = 1000, treat = "sbp",mediator = "phenoage",covariates = list(moderator = 1), weights=data4$WTMEC2YR)

print("Biologically younger (bioage<age)"); summary(out.edu0)
## [1] "Biologically younger (bioage<age)"
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
## (Inference Conditional on the Covariate Values Specified in `covariates')
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            -0.0768      -0.0959        -0.06  <2e-16 ***
## ADE             -0.0523      -0.0865        -0.02   0.002 ** 
## Total Effect    -0.1290      -0.1666        -0.09  <2e-16 ***
## Prop. Mediated   0.5927       0.4483         0.84  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2422 
## 
## 
## Simulations: 1000
print("Biologically older (bioage>=age)"); summary(out.edu1)
## [1] "Biologically older (bioage>=age)"
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
## (Inference Conditional on the Covariate Values Specified in `covariates')
## 
##                Estimate 95% CI Lower 95% CI Upper p-value   
## ACME            -0.0331      -0.0565        -0.01   0.002 **
## ADE             -0.0220      -0.0714         0.03   0.390   
## Total Effect    -0.0551      -0.1106         0.00   0.048 * 
## Prop. Mediated   0.5827       0.0169         2.44   0.050 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2422 
## 
## 
## Simulations: 1000
#sens0 <- medsens(out.edu0, rho.by = 0.1)
#plot(sens0)
#summary(sens0)

par(mfrow=c(1,2))
plot(out.edu0,main="Biologically Younger",xlim=c(-0.15,0.05))
plot(out.edu1,main="Biologically Older",xlim=c(-0.15,0.05))

#test whether mediation effect is different

set.seed(1234)
med.init <- mediate(model.m0, model.y0, treat = "sbp", mediator = "phenoage", sims=2)
test.modmed(med.init, covariates.1 = list(moderator = 1),                      covariates.2 = list(moderator = 0), sims = 1000)
## 
##  Test of ACME(covariates.1) - ACME(covariates.2) = 0
## 
## data:  estimates from med.init
## ACME(covariates.1) - ACME(covariates.2) = 0.044234, p-value = 0.002
## alternative hypothesis: true ACME(covariates.1) - ACME(covariates.2) is not equal to 0
## 95 percent confidence interval:
##  0.01543128 0.07190162
## 
## 
##  Test of ADE(covariates.1) - ADE(covariates.2) = 0
## 
## data:  estimates from med.init
## ADE(covariates.1) - ADE(covariates.2) = 0.032374, p-value = 0.314
## alternative hypothesis: true ADE(covariates.1) - ADE(covariates.2) is not equal to 0
## 95 percent confidence interval:
##  -0.02959746  0.09527645

5.3.1 Interpreation and summary of moderated mediation analysis:

When a subject is biologically younger (biological age<age), the effect of sbp is more strongly mediated through biological age compared to when a subject is biologically older (biological age>age). This implies that when a subject is biologically older, there are other potential pathways where sbp affects cognitive score.

5.4 Education as potential mediator

There might be two issues: 1. education was measured before SBP and cognition; 2. weights are not fitted correctly in the logistic regression when education is the categorical outcome.

#mediator: education. Two issues: education measured before sbp. Also, cannot include weights. 

model.m<-glm(factor(edu_cat1)~sbp+female+bmi+factor(income_recode1)+race2+race3+race4,family = binomial(link = "logit"), data=data4)
#model.m<-polr(factor(edu_cat)~sbp+female+bmi+factor(income_recode)+race2+race3+race4, method= "logistic", data=data4, Hess=T) #cannot include weights
#model.m<-lm(edu_cat1~sbp+female+bmi+factor(income_recode)+race2+race3+race4, data=data4,weights=WTMEC2YR)
model.y<-lm(CFDDS~sbp+edu_cat1+female+bmi+factor(income_recode1)+race2+race3+race4, data=data4)

set.seed(1234)
out.2 <- mediate(model.m, model.y, sims = 1000, treat = "sbp",mediator = "edu_cat1")

summary(model.m);summary(model.y);summary(out.2)
## 
## Call:
## glm(formula = factor(edu_cat1) ~ sbp + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4, family = binomial(link = "logit"), 
##     data = data4)
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              0.705830   0.371746   1.899  0.05761 .  
## sbp                     -0.007281   0.002229  -3.266  0.00109 ** 
## female                   0.093036   0.084905   1.096  0.27318    
## bmi                      0.001156   0.006831   0.169  0.86558    
## factor(income_recode1)1  1.207708   0.084480  14.296  < 2e-16 ***
## race2TRUE               -0.726288   0.105050  -6.914 4.72e-12 ***
## race3TRUE               -1.011961   0.116809  -8.663  < 2e-16 ***
## race4TRUE                0.133661   0.154146   0.867  0.38588    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3651.0  on 2636  degrees of freedom
## Residual deviance: 3267.3  on 2629  degrees of freedom
##   (281 observations deleted due to missingness)
## AIC: 3283.3
## 
## Number of Fisher Scoring iterations: 4
## 
## Call:
## lm(formula = CFDDS ~ sbp + edu_cat1 + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4, data = data4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.448  -9.501  -0.257   9.522  51.033 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             45.68999    2.50081  18.270  < 2e-16 ***
## sbp                     -0.09117    0.01470  -6.200 6.54e-10 ***
## edu_cat1                11.16471    0.60160  18.558  < 2e-16 ***
## female                   5.02311    0.56258   8.929  < 2e-16 ***
## bmi                      0.13018    0.04556   2.857   0.0043 ** 
## factor(income_recode1)1  7.29053    0.59091  12.338  < 2e-16 ***
## race2TRUE               -7.52836    0.71499 -10.529  < 2e-16 ***
## race3TRUE               -7.89788    0.78185 -10.102  < 2e-16 ***
## race4TRUE               -1.98659    1.00616  -1.974   0.0484 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.32 on 2628 degrees of freedom
##   (281 observations deleted due to missingness)
## Multiple R-squared:  0.317,  Adjusted R-squared:  0.3149 
## F-statistic: 152.5 on 8 and 2628 DF,  p-value: < 2.2e-16
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME            -0.0123      -0.2593         0.25    0.92    
## ADE             -0.0911      -0.1212        -0.06  <2e-16 ***
## Total Effect    -0.1034      -0.3510         0.16    0.40    
## Prop. Mediated   0.4660      -6.2343         6.47    0.52    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2637 
## 
## 
## Simulations: 1000
plot(out.2)

5.4.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.1

6 Global cognition z-score as the outcome, history of hypertension as exposure, Phenoage as mediator

Race adjusted global cognition z-score was created as the sum of the z-scores for the following: Sum of the CERAD scores (Trial 1 Recall, Trial 2 Recall, Trial 3 Recall, and Delayed Recall), Animal Fluency, and Digit Symbol. Exposure updated to ‘History of hypertension’. Adjusted for confounders: sex, race, education, BMI, history of chest pain (cardiovascular health), history of T2D, ever smoking.

6.1 Analysis Results

#outcome: z score global
library(broom)
data4<-data3[is.na(data3$BPQ020)==F&data3$BPQ020!=9&data3$CDQ001!=9&data3$DIQ010!=9&data3$SMQ020!=9&is.na(data3$z_global_re)==F&is.na(data3$phenoage)==F&is.na(data3$WTMEC2YR)==F&is.na(data3$female)==F&is.na(data3$income_recode1)==F&is.na(data3$race2)==F&is.na(data3$edu_cat1)==F&is.na(data3$race3)==F&is.na(data3$race4)==F&is.na(data3$bmi)==F,]
data4$hbp<-data4$BPQ020; 
data4[data4$BPQ020==1,'hbp']<-1;data4[data4$BPQ020==2,'hbp']<-0
data4<- data4 %>% 
  rename(
    "Chest_Pain"="CDQ001","T2D"="DIQ010","Smoking"="SMQ020"
  )
data4<-mutate(data4,Chest_Pain = recode(Chest_Pain, "1"="Yes", "2"="No"))
data4<-mutate(data4,T2D = recode(T2D, "1"="Yes", "3"="Borderline", "2"="No"));data4$T2D <- relevel(factor(data4$T2D), ref = "No")
data4<-mutate(data4, Smoking= recode(Smoking, "1"="Yes", "2"="No"))
data4<-data4[is.na(data4$Chest_Pain)==F&is.na(data4$Smoking)==F&is.na(data4$T2D)==F,]
#temp<-data4[,c("hbp","female","bmi","income_recode1","race2","race3","race4","edu_cat1","Chest_Pain",              "T2D","Smoking","phenoage","z_global_re","WTMEC2YR")]

model.total<-lm(z_global_re~hbp+female+bmi+factor(income_recode1)+race2+race3+race4+edu_cat1+factor(Chest_Pain)+factor(T2D)+factor(Smoking), data=data4,weights=WTMEC2YR)
model.m<-lm(phenoage~hbp+female+bmi+factor(income_recode1)+race2+race3+race4+edu_cat1+factor(Chest_Pain)+factor(T2D)+factor(Smoking), 
            data=data4,weights=WTMEC2YR)
model.y<-lm(z_global_re~hbp+phenoage+female+bmi+factor(income_recode1)+race2+race3+race4+edu_cat1+factor(Chest_Pain)+factor(T2D)+factor(Smoking), data=data4,weights=WTMEC2YR)

write.csv(file="results csv/lm_mediation_0.csv", cbind(tidy(model.m)[,1],round(tidy(model.m)[,2:5],3), round(confint(model.m),3)))
write.csv(file="results csv/lm_outcome_0.csv", cbind(tidy(model.y)[,1],round(tidy(model.y)[,2:5],3), round(confint(model.y),3)))
write.csv(file="results csv/lm_total_0.csv", cbind(tidy(model.total)[,1],round(tidy(model.total)[,2:5],3), round(confint(model.total),3)))

set.seed(1234)
out.2 <- mediate(model.m, model.y, sims = 1000, treat = "hbp",mediator = "phenoage",weights=data$WTMEC2YR)
summary(model.total);summary(model.m);summary(model.y);summary(out.2)
## 
## Call:
## lm(formula = z_global_re ~ hbp + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4 + edu_cat1 + factor(Chest_Pain) + factor(T2D) + 
##     factor(Smoking), data = data4, weights = WTMEC2YR)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1677.06  -163.86    -8.89   134.65  1751.79 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -1.76167    0.23426  -7.520 7.67e-14 ***
## hbp                     -0.53548    0.08862  -6.042 1.76e-09 ***
## female                   0.54765    0.08690   6.302 3.48e-10 ***
## bmi                      0.03318    0.00701   4.734 2.33e-06 ***
## factor(income_recode1)1  1.01377    0.09404  10.780  < 2e-16 ***
## race2TRUE                0.31290    0.16361   1.913   0.0559 .  
## race3TRUE                0.37593    0.17398   2.161   0.0308 *  
## race4TRUE                0.01972    0.20028   0.098   0.9216    
## edu_cat1                 1.40480    0.09356  15.014  < 2e-16 ***
## factor(Chest_Pain)Yes   -0.15386    0.09830  -1.565   0.1177    
## factor(T2D)Borderline   -0.12931    0.21069  -0.614   0.5395    
## factor(T2D)Yes          -0.49482    0.11393  -4.343 1.46e-05 ***
## factor(Smoking)Yes      -0.03811    0.08623  -0.442   0.6586    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 284.9 on 2408 degrees of freedom
## Multiple R-squared:  0.2039, Adjusted R-squared:    0.2 
## F-statistic: 51.41 on 12 and 2408 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = phenoage ~ hbp + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4 + edu_cat1 + factor(Chest_Pain) + factor(T2D) + 
##     factor(Smoking), data = data4, weights = WTMEC2YR)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -5857.5  -581.4     0.7   794.6  5113.9 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             70.87570    0.96012  73.820  < 2e-16 ***
## hbp                      2.97340    0.36323   8.186 4.33e-16 ***
## female                  -2.13053    0.35615  -5.982 2.53e-09 ***
## bmi                     -0.06551    0.02873  -2.280  0.02268 *  
## factor(income_recode1)1 -3.36347    0.38542  -8.727  < 2e-16 ***
## race2TRUE               -2.94799    0.67055  -4.396 1.15e-05 ***
## race3TRUE               -3.87328    0.71308  -5.432 6.14e-08 ***
## race4TRUE               -2.90458    0.82084  -3.539  0.00041 ***
## edu_cat1                -1.83630    0.38347  -4.789 1.78e-06 ***
## factor(Chest_Pain)Yes    0.77797    0.40287   1.931  0.05359 .  
## factor(T2D)Borderline    0.53488    0.86351   0.619  0.53569    
## factor(T2D)Yes           4.06459    0.46696   8.704  < 2e-16 ***
## factor(Smoking)Yes       0.56223    0.35341   1.591  0.11177    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1168 on 2408 degrees of freedom
## Multiple R-squared:  0.142,  Adjusted R-squared:  0.1377 
## F-statistic:  33.2 on 12 and 2408 DF,  p-value: < 2.2e-16
## 
## Call:
## lm(formula = z_global_re ~ hbp + phenoage + female + bmi + factor(income_recode1) + 
##     race2 + race3 + race4 + edu_cat1 + factor(Chest_Pain) + factor(T2D) + 
##     factor(Smoking), data = data4, weights = WTMEC2YR)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1529.28  -147.66    -4.53   138.25  1508.14 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.602187   0.382957  14.629  < 2e-16 ***
## hbp                     -0.226551   0.081312  -2.786  0.00537 ** 
## phenoage                -0.103898   0.004500 -23.090  < 2e-16 ***
## female                   0.326296   0.079223   4.119 3.94e-05 ***
## bmi                      0.026378   0.006351   4.153 3.39e-05 ***
## factor(income_recode1)1  0.664314   0.086439   7.685 2.21e-14 ***
## race2TRUE                0.006614   0.148656   0.044  0.96452    
## race3TRUE               -0.026500   0.158415  -0.167  0.86716    
## race4TRUE               -0.282064   0.181719  -1.552  0.12075    
## edu_cat1                 1.214008   0.085076  14.270  < 2e-16 ***
## factor(Chest_Pain)Yes   -0.073028   0.089026  -0.820  0.41213    
## factor(T2D)Borderline   -0.073732   0.190686  -0.387  0.69904    
## factor(T2D)Yes          -0.072518   0.104719  -0.693  0.48869    
## factor(Smoking)Yes       0.020304   0.078077   0.260  0.79484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 257.9 on 2407 degrees of freedom
## Multiple R-squared:  0.3483, Adjusted R-squared:  0.3448 
## F-statistic: 98.95 on 13 and 2407 DF,  p-value: < 2.2e-16
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             -0.309       -0.392        -0.23  <2e-16 ***
## ADE              -0.232       -0.385        -0.09  <2e-16 ***
## Total Effect     -0.541       -0.707        -0.38  <2e-16 ***
## Prop. Mediated    0.573        0.424         0.80  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2421 
## 
## 
## Simulations: 1000
sens0 <- medsens(out.2, rho.by = 0.1)

par(mfrow=c(1,2))
plot(out.2,xlim=c(-0.8,0))
plot(sens0)

summary(sens0)
## 
## Mediation Sensitivity Analysis for Average Causal Mediation Effect
## 
## Sensitivity Region
## 
##       Rho    ACME 95% CI Lower 95% CI Upper R^2_M*R^2_Y* R^2_M~R^2_Y~
## [1,] -0.4 -0.0236      -0.0504       0.0031         0.16       0.0895
## 
## Rho at which ACME = 0: -0.4
## R^2_M*R^2_Y* at which ACME = 0: 0.16
## R^2_M~R^2_Y~ at which ACME = 0: 0.0895
##extract results;
extract_mediation_summary <- function (x) { 

  clp <- 100 * x$conf.level
  isLinear.y <- ((class(x$model.y)[1] %in% c("lm", "rq")) || 
                   (inherits(x$model.y, "glm") && x$model.y$family$family == 
                      "gaussian" && x$model.y$family$link == "identity") || 
                   (inherits(x$model.y, "survreg") && x$model.y$dist == 
                      "gaussian"))

  printone <- !x$INT && isLinear.y

  if (printone) {

    smat <- c(x$d1, x$d1.ci, x$d1.p)
    smat <- rbind(smat, c(x$z0, x$z0.ci, x$z0.p))
    smat <- rbind(smat, c(x$tau.coef, x$tau.ci, x$tau.p))
    smat <- rbind(smat, c(x$n0, x$n0.ci, x$n0.p))

    rownames(smat) <- c("ACME", "ADE", "Total Effect", "Prop. Mediated")

  } else {
    smat <- c(x$d0, x$d0.ci, x$d0.p)
    smat <- rbind(smat, c(x$d1, x$d1.ci, x$d1.p))
    smat <- rbind(smat, c(x$z0, x$z0.ci, x$z0.p))
    smat <- rbind(smat, c(x$z1, x$z1.ci, x$z1.p))
    smat <- rbind(smat, c(x$tau.coef, x$tau.ci, x$tau.p))
    smat <- rbind(smat, c(x$n0, x$n0.ci, x$n0.p))
    smat <- rbind(smat, c(x$n1, x$n1.ci, x$n1.p))
    smat <- rbind(smat, c(x$d.avg, x$d.avg.ci, x$d.avg.p))
    smat <- rbind(smat, c(x$z.avg, x$z.avg.ci, x$z.avg.p))
    smat <- rbind(smat, c(x$n.avg, x$n.avg.ci, x$n.avg.p))

    rownames(smat) <- c("ACME (control)", "ACME (treated)", 
                        "ADE (control)", "ADE (treated)", "Total Effect", 
                        "Prop. Mediated (control)", "Prop. Mediated (treated)", 
                        "ACME (average)", "ADE (average)", "Prop. Mediated (average)")

  }

  colnames(smat) <- c("Estimate", paste(clp, "% CI Lower", sep = ""), 
                      paste(clp, "% CI Upper", sep = ""), "p-value")
  smat
}

write.csv(extract_mediation_summary(out.2),file="results csv/med_main.csv")

##Baseline summary statistics for paper:

## Warning in st(data4, vars = c("RIDAGEYR", "Female", "race1", "race2", "race3", : Factor variables ignore custom summ options. Cols 1 and 2 are count and percentage.
## Beware combining factors with a custom summ unless factor.numeric = TRUE.
##          Variable      Mean    Sd      Mean    Sd
## 1            year 2011-2012       2013-2014      
## 2        RIDAGEYR      69.1  6.78      69.7  6.74
## 3          Female      1079            1342      
## 4           ... 0       528 48.9%       647 48.2%
## 5           ... 1       551 51.1%       695 51.8%
## 6           race1      1079            1342      
## 7          ... No       577 53.5%       621 46.3%
## 8         ... Yes       502 46.5%       721 53.7%
## 9           race2      1079            1342      
## 10         ... No       800 74.1%      1088 81.1%
## 11        ... Yes       279 25.9%       254 18.9%
## 12          race3      1079            1342      
## 13         ... No       883 81.8%      1094 81.5%
## 14        ... Yes       196 18.2%       248 18.5%
## 15          race4      1079            1342      
## 16         ... No       977 90.5%      1223 91.1%
## 17        ... Yes       102  9.5%       119  8.9%
## 18       edu_cat1      1079            1342      
## 19          ... 0       519 48.1%       623 46.4%
## 20          ... 1       560 51.9%       719 53.6%
## 21 income_recode1      1079            1342      
## 22          ... 0       527 48.8%       626 46.6%
## 23          ... 1       552 51.2%       716 53.4%
## 24            sbp       133  18.9       133  19.1
## 25       phenoage        66  9.26      68.9  9.34
## 26     bioadvance      1079            1342      
## 27          ... 0       824 76.4%       835 62.2%
## 28          ... 1       255 23.6%       507 37.8%
## 29    z_global_re     0.129  2.33     0.331  2.32
## 30     z_digit_re    0.0763 0.991    0.0449 0.969
## 31    z_animal_re    0.0977     1     0.062 0.975
## 32     z_cerad_re   -0.0447 0.917     0.224 0.945
## 33            bmi      29.3  6.37      29.1  6.38
## 34          hyper      1079            1342      
## 35          ... 0       421   39%       497   37%
## 36          ... 1       658   61%       845   63%
## 37            T2D      1079            1342      
## 38         ... No       798   74%       969 72.2%
## 39 ... Borderline        38  3.5%        78  5.8%
## 40        ... Yes       243 22.5%       295   22%
## 41     Chest_Pain      1079            1342      
## 42         ... No       824 76.4%       995 74.1%
## 43        ... Yes       255 23.6%       347 25.9%
## 44        Smoking      1079            1342      
## 45         ... No       535 49.6%       656 48.9%
## 46        ... Yes       544 50.4%       686 51.1%
## Warning in st(data4, vars = c("RIDAGEYR", "Female", "race1", "race2", "race3", : Factor variables ignore custom summ options. Cols 1 and 2 are count and percentage.
## Beware combining factors with a custom summ unless factor.numeric = TRUE.
##          Variable   Mean    Sd
## 1        RIDAGEYR   69.4  6.76
## 2          Female   2421      
## 3           ... 0   1175 48.5%
## 4           ... 1   1246 51.5%
## 5           race1   2421      
## 6          ... No   1198 49.5%
## 7         ... Yes   1223 50.5%
## 8           race2   2421      
## 9          ... No   1888   78%
## 10        ... Yes    533   22%
## 11          race3   2421      
## 12         ... No   1977 81.7%
## 13        ... Yes    444 18.3%
## 14          race4   2421      
## 15         ... No   2200 90.9%
## 16        ... Yes    221  9.1%
## 17       edu_cat1   2421      
## 18          ... 0   1142 47.2%
## 19          ... 1   1279 52.8%
## 20 income_recode1   2421      
## 21          ... 0   1153 47.6%
## 22          ... 1   1268 52.4%
## 23            sbp    133    19
## 24       phenoage   67.6  9.42
## 25     bioadvance   2421      
## 26          ... 0   1659 68.5%
## 27          ... 1    762 31.5%
## 28    z_global_re  0.241  2.33
## 29     z_digit_re 0.0589 0.979
## 30    z_animal_re 0.0779 0.986
## 31     z_cerad_re  0.104 0.942
## 32            bmi   29.2  6.38
## 33          hyper   2421      
## 34          ... 0    918 37.9%
## 35          ... 1   1503 62.1%
## 36            T2D   2421      
## 37         ... No   1767   73%
## 38 ... Borderline    116  4.8%
## 39        ... Yes    538 22.2%
## 40     Chest_Pain   2421      
## 41         ... No   1819 75.1%
## 42        ... Yes    602 24.9%
## 43        Smoking   2421      
## 44         ... No   1191 49.2%
## 45        ... Yes   1230 50.8%

6.2 Moderated mediation analysis by biologically old (bioage>=age) versus biologically young (bioage<age)

moderator<-data4$bioadvance
model.m0<-lm(phenoage~hbp*moderator+female+bmi+factor(income_recode1)+edu_cat1+factor(Chest_Pain)+factor(T2D)+factor(Smoking), 
             data=data4,weights=WTMEC2YR)
model.y0<-lm(z_global_re~phenoage+hbp*moderator+phenoage*moderator+female+bmi+factor(income_recode1)+edu_cat1+factor(Chest_Pain)+factor(T2D)+factor(Smoking), 
             data=data4,weights=WTMEC2YR)

set.seed(1234)
out.edu0 <- mediate(model.m0, model.y0, sims = 1000, treat = "hbp",mediator = "phenoage",covariates = list(moderator = 0), weights=data4$WTMEC2YR)
set.seed(1234)
out.edu1 <- mediate(model.m0, model.y0, sims = 1000, treat = "hbp",mediator = "phenoage",covariates = list(moderator = 1), weights=data4$WTMEC2YR)

print("Delayed Biological Aging (bioage<age)"); summary(out.edu0)
## [1] "Delayed Biological Aging (bioage<age)"
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
## (Inference Conditional on the Covariate Values Specified in `covariates')
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME             -0.309       -0.404        -0.21  <2e-16 ***
## ADE              -0.280       -0.469        -0.09   0.002 ** 
## Total Effect     -0.590       -0.792        -0.39  <2e-16 ***
## Prop. Mediated    0.525        0.361         0.77  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2421 
## 
## 
## Simulations: 1000
print("Accelerated Biological Aging (bioage>=age)"); summary(out.edu1)
## [1] "Accelerated Biological Aging (bioage>=age)"
## 
## Causal Mediation Analysis 
## 
## Quasi-Bayesian Confidence Intervals
## 
## (Inference Conditional on the Covariate Values Specified in `covariates')
## 
##                Estimate 95% CI Lower 95% CI Upper p-value  
## ACME            -0.1343      -0.2604        -0.02    0.02 *
## ADE             -0.0414      -0.3234         0.27    0.81  
## Total Effect    -0.1757      -0.4919         0.15    0.31  
## Prop. Mediated   0.5414      -6.8068         6.31    0.32  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 2421 
## 
## 
## Simulations: 1000
par(mfrow=c(1,2))
plot(out.edu0,main="Delayed Biological Aging",xlim=c(-0.8,0.3))
plot(out.edu1,main="Accelerated Biological Aging",xlim=c(-0.8,0.3))

write.csv(extract_mediation_summary(out.edu0),file="results csv/med_delayed.csv")
write.csv(extract_mediation_summary(out.edu1),file="results csv/med_accl.csv")

set.seed(1234)
med.init <- mediate(model.m0, model.y0, treat = "hbp", mediator = "phenoage", sims=2)
test.modmed(med.init, covariates.1 = list(moderator = 1),                      covariates.2 = list(moderator = 0), sims = 1000)
## 
##  Test of ACME(covariates.1) - ACME(covariates.2) = 0
## 
## data:  estimates from med.init
## ACME(covariates.1) - ACME(covariates.2) = 0.17352, p-value = 0.028
## alternative hypothesis: true ACME(covariates.1) - ACME(covariates.2) is not equal to 0
## 95 percent confidence interval:
##  0.01744543 0.33353201
## 
## 
##  Test of ADE(covariates.1) - ADE(covariates.2) = 0
## 
## data:  estimates from med.init
## ADE(covariates.1) - ADE(covariates.2) = 0.22909, p-value = 0.216
## alternative hypothesis: true ADE(covariates.1) - ADE(covariates.2) is not equal to 0
## 95 percent confidence interval:
##  -0.1296796  0.6077913

6.2.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.1

###not to consider the rest. only exploratory. # Phenoage as exposure and sbp as mediator

#outcome: z score global
library(broom)
data4<-data3[is.na(data3$sbp)==F&is.na(data3$z_global_re)==F&is.na(data3$WTMEC2YR)==F,]
data4$sbp1<-data4$sbp/sd(data4$sbp)

model.total<-lm(z_global_re~phenoage+female+income_recode1+edu_cat1+bmi, data=data4,weights=WTMEC2YR)
model.m<-lm(sbp~phenoage+female+income_recode1+edu_cat1+bmi, data=data4,weights=WTMEC2YR)
model.y<-lm(z_global_re~sbp+phenoage+female+income_recode1+edu_cat1+bmi, data=data4,weights=WTMEC2YR)

set.seed(1234)
out.2 <- mediate(model.m, model.y, sims = 1000, treat = "phenoage",mediator = "sbp",weights=data$WTMEC2YR)

summary(model.total);summary(model.m);summary(model.y);summary(out.2)

plot(out.2,xlim=c(-0.13,0))

sens0 <- medsens(out.2, rho.by = 0.1)
plot(sens0)
summary(sens0)
model.total<-lm(z_global_re~edu_cat1+sbp1+female+income_recode1+bmi, data=data4,weights=WTMEC2YR)
model.m<-lm(phenoage~edu_cat1+sbp1+female+income_recode1+bmi, data=data4,weights=WTMEC2YR)
model.y<-lm(z_global_re~edu_cat1+phenoage+sbp1+female+income_recode1+bmi, data=data4,weights=WTMEC2YR)

set.seed(1234)
out.2 <- mediate(model.m, model.y, sims = 1000, treat = "edu_cat1",mediator = "phenoage",weights=data4$WTMEC2YR)

summary(model.total);summary(model.m);summary(model.y);summary(out.2)
plot(out.2,xlim=c(0,2))

sens0 <- medsens(out.2, rho.by = 0.1)
plot(sens0)
summary(sens0)
moderator<-data4$bioadvance
model.m0<-lm(phenoage~edu_cat1*moderator+sbp1+female+bmi+factor(income_recode1), data=data4,weights=WTMEC2YR)
model.y0<-lm(z_global_re~phenoage+edu_cat1*moderator+phenoage*moderator+sbp1+female+bmi+factor(income_recode1), data=data4,weights=WTMEC2YR)

set.seed(1234)
out.edu0 <- mediate(model.m0, model.y0, sims = 1000, treat = "edu_cat1",mediator = "phenoage",covariates = list(moderator = 0), weights=data4$WTMEC2YR)
set.seed(1234)
out.edu1 <- mediate(model.m0, model.y0, sims = 1000, treat = "edu_cat1",mediator = "phenoage",covariates = list(moderator = 1), weights=data4$WTMEC2YR)

print("Delayed Biological Aging (bioage<age)"); summary(out.edu0)
print("Accelerated Biological Aging (bioage>=age)"); summary(out.edu1)

par(mfrow=c(1,2))
plot(out.edu1,main="Accelerated Biological Aging",xlim=c(0,2))
plot(out.edu0,main="Delayed Biological Aging",xlim=c(0,2))

#sens0 <- medsens(out.edu0, rho.by = 0.1)
#plot(sens0); summary(sens0)

#sens1 <- medsens(out.edu1, rho.by = 0.1)
#plot(sens1); #summary(sens1)

#test whether mediation effect is different
set.seed(1234)
med.init <- mediate(model.m0, model.y0, treat = "edu_cat1", mediator = "phenoage", sims=2)
test.modmed(med.init, covariates.1 = list(moderator = 1),                      covariates.2 = list(moderator = 0), sims = 1000)
# education as exposure, stratified, similar
data5_1<-data4[data4$bioadvance==1,]
model.total<-lm(z_global_re~edu_cat1+sbp1+female+income_recode1+bmi, data=data5_1,weights=WTMEC2YR)
model.m<-lm(phenoage~edu_cat1+sbp1+female+income_recode1+bmi, data=data5_1,weights=WTMEC2YR)
model.y<-lm(z_global_re~edu_cat1+phenoage+sbp1+female+income_recode1+bmi, data=data5_1,weights=WTMEC2YR)

set.seed(1234)
out.2 <- mediate(model.m, model.y, sims = 1000, treat = "edu_cat1",mediator = "phenoage",weights=data5_1$WTMEC2YR)

summary(model.total);summary(model.m);summary(model.y);summary(out.2)


data5_2<-data4[data4$bioadvance==0,]
model.total<-lm(z_global_re~edu_cat1+sbp1+female+income_recode1+bmi, data=data5_2,weights=WTMEC2YR)
model.m<-lm(phenoage~edu_cat1+sbp1+female+income_recode1+bmi, data=data5_2,weights=WTMEC2YR)
model.y<-lm(z_global_re~edu_cat1+phenoage+sbp1+female+income_recode1+bmi, data=data5_2,weights=WTMEC2YR)

set.seed(1234)
out.3 <- mediate(model.m, model.y, sims = 1000, treat = "edu_cat1",mediator = "phenoage",weights=data5_2$WTMEC2YR)

summary(model.total);summary(model.m);summary(model.y);summary(out.3)

par(mfrow=c(1,2))
plot(out.2,main="Accelerated Biological Aging",xlim=c(0,2));plot(out.3,main="Delayed Biological Aging",xlim=c(0,2))